{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f7927e81",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stefan/.conda/envs/pts/lib/python3.8/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.4\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pathlib import Path\n",
    "import glob\n",
    "from joblib import load\n",
    "from sklearn.metrics import r2_score"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "954b6c05",
   "metadata": {},
   "source": [
    "# look at stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "836b6c7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# we only include the anonomysed set as agreed with the data owners\n",
    "\n",
    "df_train = pd.read_csv(\"../nfi-data/train_split.csv\")\n",
    "df_val = pd.read_csv(\"../nfi-data/val_split.csv\")\n",
    "df_test = pd.read_csv(\"../nfi-data/test_split.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "229e28c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train.eval(\"temp_diff_years = temp_diff_days / 365\", inplace=True)\n",
    "df_val.eval(\"temp_diff_years = temp_diff_days / 365\", inplace=True)\n",
    "df_test.eval(\"temp_diff_years = temp_diff_days / 365\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "01da773a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "919"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f7e8a4e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "variable_list = [\n",
    "    \"h_mean_1_\",\n",
    "    \"h_mean_2_\",\n",
    "    \"h_std_1_\",\n",
    "    \"h_std_2_\",\n",
    "    \"h_coov_1_\",\n",
    "    \"h_coov_2_\",\n",
    "    \"h_kur_1_\",\n",
    "    \"h_kur_2_\",\n",
    "    \"h_skew_1_\",\n",
    "    \"h_skew_2_\",\n",
    "    \"IR_\",\n",
    "    *[f\"h_q{i}_1_\" for i in [5, 10, 25, 50, 75, 90, 95, 99]],\n",
    "    *[f\"h_q{i}_2_\" for i in [5, 10, 25, 50, 75, 90, 95, 99]],\n",
    "    \"temp_diff_years\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "62dc65ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "target_list = [\"BMag_ha\", \"V_ha\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a202a5b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>h_mean_1_</th>\n",
       "      <th>h_mean_2_</th>\n",
       "      <th>h_std_1_</th>\n",
       "      <th>h_std_2_</th>\n",
       "      <th>h_coov_1_</th>\n",
       "      <th>h_coov_2_</th>\n",
       "      <th>h_kur_1_</th>\n",
       "      <th>h_kur_2_</th>\n",
       "      <th>h_skew_1_</th>\n",
       "      <th>h_skew_2_</th>\n",
       "      <th>...</th>\n",
       "      <th>h_q99_1_</th>\n",
       "      <th>h_q5_2_</th>\n",
       "      <th>h_q10_2_</th>\n",
       "      <th>h_q25_2_</th>\n",
       "      <th>h_q50_2_</th>\n",
       "      <th>h_q75_2_</th>\n",
       "      <th>h_q90_2_</th>\n",
       "      <th>h_q95_2_</th>\n",
       "      <th>h_q99_2_</th>\n",
       "      <th>temp_diff_years</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.00000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>7.756886</td>\n",
       "      <td>10.737537</td>\n",
       "      <td>5.021482</td>\n",
       "      <td>3.619087</td>\n",
       "      <td>1.067221</td>\n",
       "      <td>0.382178</td>\n",
       "      <td>22.067038</td>\n",
       "      <td>2.080255</td>\n",
       "      <td>0.551701</td>\n",
       "      <td>-0.235734</td>\n",
       "      <td>...</td>\n",
       "      <td>16.771935</td>\n",
       "      <td>4.393771</td>\n",
       "      <td>5.840024</td>\n",
       "      <td>8.442274</td>\n",
       "      <td>11.08420</td>\n",
       "      <td>13.304094</td>\n",
       "      <td>15.007245</td>\n",
       "      <td>15.913966</td>\n",
       "      <td>17.312287</td>\n",
       "      <td>-0.070031</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>5.650914</td>\n",
       "      <td>5.826700</td>\n",
       "      <td>2.618811</td>\n",
       "      <td>1.775645</td>\n",
       "      <td>1.104352</td>\n",
       "      <td>0.218708</td>\n",
       "      <td>280.693397</td>\n",
       "      <td>14.606087</td>\n",
       "      <td>4.220633</td>\n",
       "      <td>1.242230</td>\n",
       "      <td>...</td>\n",
       "      <td>7.569541</td>\n",
       "      <td>3.862539</td>\n",
       "      <td>4.677638</td>\n",
       "      <td>5.669329</td>\n",
       "      <td>6.37934</td>\n",
       "      <td>6.791960</td>\n",
       "      <td>7.077031</td>\n",
       "      <td>7.250701</td>\n",
       "      <td>7.457928</td>\n",
       "      <td>0.555424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-0.024825</td>\n",
       "      <td>1.112857</td>\n",
       "      <td>0.053250</td>\n",
       "      <td>0.087785</td>\n",
       "      <td>-4.656972</td>\n",
       "      <td>0.069645</td>\n",
       "      <td>-1.926685</td>\n",
       "      <td>-2.000000</td>\n",
       "      <td>-13.816676</td>\n",
       "      <td>-3.078436</td>\n",
       "      <td>...</td>\n",
       "      <td>0.070000</td>\n",
       "      <td>1.010000</td>\n",
       "      <td>1.020000</td>\n",
       "      <td>1.030000</td>\n",
       "      <td>1.07500</td>\n",
       "      <td>1.150000</td>\n",
       "      <td>1.197000</td>\n",
       "      <td>1.252500</td>\n",
       "      <td>1.330500</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>3.179662</td>\n",
       "      <td>6.239891</td>\n",
       "      <td>3.099313</td>\n",
       "      <td>2.383729</td>\n",
       "      <td>0.535194</td>\n",
       "      <td>0.262913</td>\n",
       "      <td>-1.173350</td>\n",
       "      <td>-0.621735</td>\n",
       "      <td>-0.683866</td>\n",
       "      <td>-0.871200</td>\n",
       "      <td>...</td>\n",
       "      <td>11.371750</td>\n",
       "      <td>1.590000</td>\n",
       "      <td>2.185000</td>\n",
       "      <td>3.743750</td>\n",
       "      <td>6.01250</td>\n",
       "      <td>8.045000</td>\n",
       "      <td>9.776000</td>\n",
       "      <td>10.440000</td>\n",
       "      <td>11.883950</td>\n",
       "      <td>-0.613699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>6.672952</td>\n",
       "      <td>9.647054</td>\n",
       "      <td>4.635112</td>\n",
       "      <td>3.401080</td>\n",
       "      <td>0.798720</td>\n",
       "      <td>0.354456</td>\n",
       "      <td>-0.470218</td>\n",
       "      <td>-0.028935</td>\n",
       "      <td>0.003507</td>\n",
       "      <td>-0.356868</td>\n",
       "      <td>...</td>\n",
       "      <td>16.860200</td>\n",
       "      <td>2.865000</td>\n",
       "      <td>4.172000</td>\n",
       "      <td>7.060000</td>\n",
       "      <td>9.95000</td>\n",
       "      <td>12.360000</td>\n",
       "      <td>14.560000</td>\n",
       "      <td>15.696500</td>\n",
       "      <td>17.290000</td>\n",
       "      <td>0.068493</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>11.129150</td>\n",
       "      <td>14.899706</td>\n",
       "      <td>6.880495</td>\n",
       "      <td>4.625316</td>\n",
       "      <td>1.197607</td>\n",
       "      <td>0.444563</td>\n",
       "      <td>1.494342</td>\n",
       "      <td>1.175310</td>\n",
       "      <td>0.886396</td>\n",
       "      <td>0.223805</td>\n",
       "      <td>...</td>\n",
       "      <td>22.560900</td>\n",
       "      <td>5.934000</td>\n",
       "      <td>8.335000</td>\n",
       "      <td>12.132500</td>\n",
       "      <td>15.76000</td>\n",
       "      <td>18.273750</td>\n",
       "      <td>20.246000</td>\n",
       "      <td>21.307000</td>\n",
       "      <td>22.789950</td>\n",
       "      <td>0.410959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>26.873604</td>\n",
       "      <td>28.328451</td>\n",
       "      <td>15.993492</td>\n",
       "      <td>14.319471</td>\n",
       "      <td>13.515706</td>\n",
       "      <td>2.933647</td>\n",
       "      <td>5577.448026</td>\n",
       "      <td>253.406503</td>\n",
       "      <td>68.145993</td>\n",
       "      <td>12.024318</td>\n",
       "      <td>...</td>\n",
       "      <td>44.660000</td>\n",
       "      <td>21.680000</td>\n",
       "      <td>24.490000</td>\n",
       "      <td>27.530000</td>\n",
       "      <td>29.80500</td>\n",
       "      <td>37.080000</td>\n",
       "      <td>40.994000</td>\n",
       "      <td>42.590000</td>\n",
       "      <td>46.510000</td>\n",
       "      <td>0.936986</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        h_mean_1_   h_mean_2_    h_std_1_    h_std_2_   h_coov_1_   h_coov_2_  \\\n",
       "count  919.000000  919.000000  919.000000  919.000000  919.000000  919.000000   \n",
       "mean     7.756886   10.737537    5.021482    3.619087    1.067221    0.382178   \n",
       "std      5.650914    5.826700    2.618811    1.775645    1.104352    0.218708   \n",
       "min     -0.024825    1.112857    0.053250    0.087785   -4.656972    0.069645   \n",
       "25%      3.179662    6.239891    3.099313    2.383729    0.535194    0.262913   \n",
       "50%      6.672952    9.647054    4.635112    3.401080    0.798720    0.354456   \n",
       "75%     11.129150   14.899706    6.880495    4.625316    1.197607    0.444563   \n",
       "max     26.873604   28.328451   15.993492   14.319471   13.515706    2.933647   \n",
       "\n",
       "          h_kur_1_    h_kur_2_   h_skew_1_   h_skew_2_  ...    h_q99_1_  \\\n",
       "count   919.000000  919.000000  919.000000  919.000000  ...  919.000000   \n",
       "mean     22.067038    2.080255    0.551701   -0.235734  ...   16.771935   \n",
       "std     280.693397   14.606087    4.220633    1.242230  ...    7.569541   \n",
       "min      -1.926685   -2.000000  -13.816676   -3.078436  ...    0.070000   \n",
       "25%      -1.173350   -0.621735   -0.683866   -0.871200  ...   11.371750   \n",
       "50%      -0.470218   -0.028935    0.003507   -0.356868  ...   16.860200   \n",
       "75%       1.494342    1.175310    0.886396    0.223805  ...   22.560900   \n",
       "max    5577.448026  253.406503   68.145993   12.024318  ...   44.660000   \n",
       "\n",
       "          h_q5_2_    h_q10_2_    h_q25_2_   h_q50_2_    h_q75_2_    h_q90_2_  \\\n",
       "count  919.000000  919.000000  919.000000  919.00000  919.000000  919.000000   \n",
       "mean     4.393771    5.840024    8.442274   11.08420   13.304094   15.007245   \n",
       "std      3.862539    4.677638    5.669329    6.37934    6.791960    7.077031   \n",
       "min      1.010000    1.020000    1.030000    1.07500    1.150000    1.197000   \n",
       "25%      1.590000    2.185000    3.743750    6.01250    8.045000    9.776000   \n",
       "50%      2.865000    4.172000    7.060000    9.95000   12.360000   14.560000   \n",
       "75%      5.934000    8.335000   12.132500   15.76000   18.273750   20.246000   \n",
       "max     21.680000   24.490000   27.530000   29.80500   37.080000   40.994000   \n",
       "\n",
       "         h_q95_2_    h_q99_2_  temp_diff_years  \n",
       "count  919.000000  919.000000       919.000000  \n",
       "mean    15.913966   17.312287        -0.070031  \n",
       "std      7.250701    7.457928         0.555424  \n",
       "min      1.252500    1.330500        -1.000000  \n",
       "25%     10.440000   11.883950        -0.613699  \n",
       "50%     15.696500   17.290000         0.068493  \n",
       "75%     21.307000   22.789950         0.410959  \n",
       "max     42.590000   46.510000         0.936986  \n",
       "\n",
       "[8 rows x 28 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_val[variable_list].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cd534652",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BMag_ha</th>\n",
       "      <th>V_ha</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>919.000000</td>\n",
       "      <td>919.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>112.378628</td>\n",
       "      <td>210.213042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>105.828078</td>\n",
       "      <td>195.837800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>29.292855</td>\n",
       "      <td>52.422856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>86.551500</td>\n",
       "      <td>154.638388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>165.194985</td>\n",
       "      <td>325.370432</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>668.081976</td>\n",
       "      <td>1176.918249</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          BMag_ha         V_ha\n",
       "count  919.000000   919.000000\n",
       "mean   112.378628   210.213042\n",
       "std    105.828078   195.837800\n",
       "min      0.000000     0.000000\n",
       "25%     29.292855    52.422856\n",
       "50%     86.551500   154.638388\n",
       "75%    165.194985   325.370432\n",
       "max    668.081976  1176.918249"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_val[target_list].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "958358bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([<AxesSubplot: ylabel='Frequency'>,\n",
       "       <AxesSubplot: ylabel='Frequency'>], dtype=object)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAGdCAYAAAD9kBJPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAAUlEQVR4nO3de1RVdf7/8deJywkUUEE4MCJRYk7iVGrpOJqaiqKZl76lqQlqrRzN0dS85LefVipWI1njV2uaAh0r7aJ97S5eu2ijonhrMsZQUCHMFMQLIOzfHy7PtyN44bgPHDjPx1p7rc5nf84+7/PROq8++7P3thiGYQgAAMBD3FDTBQAAAFQnwg8AAPAohB8AAOBRCD8AAMCjEH4AAIBHIfwAAACPQvgBAAAehfADAAA8indNF+AOysvLdfToUQUEBMhisdR0OQAA4BoYhqFTp04pIiJCN9xw7fM5hB9JR48eVWRkZE2XAQAAnJCTk6MmTZpcc3/Cj6SAgABJFwYvMDCwhqsBAADXorCwUJGRkfbf8WtF+JHsp7oCAwMJPwAA1DJVXbLCgmcAAOBRCD8AAMCjEH4AAIBHYc0PAKBOMwxD58+fV1lZWU2Xgiry8vKSt7e36behIfwAAOqskpIS5ebm6syZMzVdCpzk7++v8PBw+fr6mnZMwo+L3TTt0wptB+f1qYFKAMCzlJeXKysrS15eXoqIiJCvry83sq1FDMNQSUmJjh07pqysLMXExFTpRoZXQvgBANRJJSUlKi8vV2RkpPz9/Wu6HDjBz89PPj4+OnTokEpKSnTjjTeaclwWPAMA6jSzZgtQM1zx58ffCAAA4FEIPwAAwKOw5gcA4HEquxjFlTzhQpfExESdPHlSH330UU2XclXM/AAA4GYSExNlsVjsW3BwsHr16qXdu3fb+1zc99133zm8t7i4WMHBwbJYLNq4cWM1V147EH4AAHBDvXr1Um5urnJzc7Vu3Tp5e3vrvvvuc+gTGRmplJQUh7ZVq1apfv361VlqrUP4AQDADVmtVtlsNtlsNt1xxx2aOnWqcnJydOzYMXufhIQELV++XGfPnrW3vfXWW0pISKhwvKlTp6p58+by9/fXzTffrGeeeUalpaUOfWbPnq3Q0FAFBATo0Ucf1bRp03THHXdUqe6//vWvCg8PV3BwsMaOHevwGcuWLVPbtm0VEBAgm82mIUOGKD8/v0rHNwPhBwAAN1dUVKS3335bzZo1U3BwsL29TZs2io6O1ocffihJysnJ0VdffaVHHnmkwjECAgKUmpqq77//Xq+88oreeOMNvfzyy/b9b7/9tubMmaMXXnhB6enpatq0qRYvXlylOjds2KADBw5ow4YNWrJkiVJTU5WammrfX1JSoueff167du3SRx99pKysLCUmJlZtMEzAgmcAANzQJ598Yj99dfr0aYWHh+uTTz6pcN+bESNG6K233tKwYcOUkpKi3r17q3HjxhWO99///d/2f77ppps0adIkrVixQlOmTJEk/e1vf9OoUaM0YsQISdL/+3//T2vWrFFRUdE119ywYUMtXLhQXl5eatGihfr06aN169bpsccekySNHDnS3vfmm2/Wq6++qrvvvltFRUXVeqquRmd+vvrqK/Xt21cRERGyWCwOK8RLS0s1depUtWrVSvXq1VNERISGDx+uo0ePOhyjS5cuDovCLBaLBg8eXM3fBAAAc3Xt2lUZGRnKyMjQv/71L8XFxSk+Pl6HDh1y6Dds2DBt2bJFP/30k1JTUx0Cxm998MEH6tixo2w2m+rXr69nnnlG2dnZ9v379+/X3Xff7fCeS19fTcuWLeXl5WV/HR4e7nBaa+fOnerXr5+ioqIUEBCgLl26SJJDHdWhRsPP6dOndfvtt2vhwoUV9p05c0Y7duzQM888ox07dmjlypX68ccfdf/991fo+9hjj9kXheXm5ur111+vjvIBAHCZevXqqVmzZmrWrJnuvvtuvfnmmzp9+rTeeOMNh37BwcG67777NGrUKJ07d07x8fEVjvXdd99p8ODBio+P1yeffKKdO3dqxowZKikpceh36bPPDMOoUs0+Pj4VjldeXi7pwm9+XFyc6tevr2XLlmnbtm1atWqVJFWow9Vq9LRXfHx8pX9IkhQUFKS0tDSHtr/97W+6++67lZ2draZNm9rb/f39ZbPZXForAAA1yWKx6IYbbnBY3HzRyJEj1bt3b02dOtVh5uWib7/9VlFRUZoxY4a97dIZpFtvvVVbt251WC+0fft20+r/4Ycf9Msvv2jevHmKjIw0/fhVUavW/BQUFMhisahBgwYO7W+//baWLVumsLAwxcfHa+bMmQoICLjscYqLi1VcXGx/XVhY6KqSAQBwSnFxsfLy8iRJJ06c0MKFC1VUVKS+fftW6NurVy8dO3ZMgYGBlR6rWbNmys7O1vLly3XXXXfp008/tc+6XDRu3Dg99thjatu2rTp06KAVK1Zo9+7duvnmm035Pk2bNpWvr6/+9re/afTo0dq7d6+ef/55U45dVbUm/Jw7d07Tpk3TkCFDHP5whw4dqujoaNlsNu3du1fTp0/Xrl27Kswa/VZSUpKeffbZ6igbAOCGasMdl7/44guFh4dLunClVosWLfT+++/b18n8lsViUUhIyGWP1a9fPz355JN64oknVFxcrD59+uiZZ57RrFmz7H2GDh2qn376SZMnT9a5c+f00EMPKTExUVu3bjXl+zRu3Fipqal6+umn9eqrr6p169b661//WulyFlezGFU9oeciFotFq1atUv/+/SvsKy0t1YMPPqjs7Gxt3LjxsslWktLT09W2bVulp6erdevWlfapbOYnMjJSBQUFVzy2Myq7hXpt+JcOAGq7c+fOKSsrS9HR0brxxhtrupxaqUePHrLZbPrnP/9ZYzVc6c+xsLBQQUFBVf79dvuZn9LSUj300EPKysrS+vXrr/rlWrduLR8fH2VmZl42/FitVlmtVleUCwBArXTmzBm99tpr6tmzp7y8vPTuu+9q7dq1VzyTUlu5dfi5GHwyMzO1YcMGhxs7Xc6+fftUWlpqnyoEAABXZ7FY9Nlnn2n27NkqLi7Wrbfeqg8//FDdu3eXpCveh+fzzz9Xp06dqqvU61aj4aeoqEj/+c9/7K+zsrKUkZGhRo0aKSIiQv/1X/+lHTt26JNPPlFZWZl94VejRo3k6+urAwcO6O2331bv3r0VEhKi77//XpMmTdKdd96pP/3pTzX1tQAAqHX8/Py0du3ay+7PyMi47L7f/e53LqjIdWo0/Gzfvl1du3a1v544caKkC88qmTVrllavXi1JFZ4rsmHDBnXp0kW+vr5at26dXnnlFRUVFSkyMlJ9+vTRzJkzK73UDwAAOKdZs2Y1XYJpajT8dOnS5Yo3ULraWuzIyEht2rTJ7LIAAHWIm1zXAye54s+PB5sCAOqki3cbPnPmTA1Xgutx8c/v0rtHXw+3XvAMAICzvLy81KBBA/uzpfz9/Ss8vgHuyzAMnTlzRvn5+WrQoIGpy1kIPwCAOuvio49++3BN1C4NGjQw/RFWhB8AQJ1lsVgUHh6u0NBQlZaW1nQ5qCIfHx+XXMBE+AEA1HleXl5cBQw7FjwDAACPQvgBAAAehfADAAA8CuEHAAB4FMIPAADwKIQfAADgUQg/AADAoxB+AACARyH8AAAAj0L4AQAAHoXwAwAAPArhBwAAeBTCDwAA8Cg1Gn6++uor9e3bVxEREbJYLProo48c9huGoVmzZikiIkJ+fn7q0qWL9u3b59CnuLhY48aNU0hIiOrVq6f7779fhw8frsZvAQAAapMaDT+nT5/W7bffroULF1a6/8UXX1RycrIWLlyobdu2yWazqUePHjp16pS9z4QJE7Rq1SotX75c33zzjYqKinTfffeprKysur4GAACoRbxr8sPj4+MVHx9f6T7DMLRgwQLNmDFDAwcOlCQtWbJEYWFheuedd/T444+roKBAb775pv75z3+qe/fukqRly5YpMjJSa9euVc+ePavtuwAAgNrBqZmfrKwss+uo9DPy8vIUFxdnb7NarercubM2b94sSUpPT1dpaalDn4iICMXGxtr7VKa4uFiFhYUOGwAA8AxOhZ9mzZqpa9euWrZsmc6dO2d2TZKkvLw8SVJYWJhDe1hYmH1fXl6efH191bBhw8v2qUxSUpKCgoLsW2RkpMnVAwAAd+VU+Nm1a5fuvPNOTZo0STabTY8//ri2bt1qdm2SJIvF4vDaMIwKbZe6Wp/p06eroKDAvuXk5JhSKwAAcH9OhZ/Y2FglJyfryJEjSklJUV5enjp27KiWLVsqOTlZx44du+7CbDabJFWYwcnPz7fPBtlsNpWUlOjEiROX7VMZq9WqwMBAhw0AAHiG67ray9vbWwMGDNB7772nF154QQcOHNDkyZPVpEkTDR8+XLm5uU4fOzo6WjabTWlpafa2kpISbdq0SR06dJAktWnTRj4+Pg59cnNztXfvXnsfAACA37qu8LN9+3aNGTNG4eHhSk5O1uTJk3XgwAGtX79eR44cUb9+/a74/qKiImVkZCgjI0PShUXOGRkZys7OlsVi0YQJEzR37lytWrVKe/fuVWJiovz9/TVkyBBJUlBQkEaNGqVJkyZp3bp12rlzp4YNG6ZWrVrZr/4CAAD4LacudU9OTlZKSor279+v3r17a+nSperdu7duuOFCloqOjtbrr7+uFi1aXPE427dvV9euXe2vJ06cKElKSEhQamqqpkyZorNnz2rMmDE6ceKE2rVrpzVr1iggIMD+npdfflne3t566KGHdPbsWXXr1k2pqany8vJy5qsBAIA6zmIYhlHVN8XExGjkyJEaMWKEfW3OpUpKSvTuu+8qISHhuot0tcLCQgUFBamgoMD09T83Tfu0QtvBeX1M/QwAADyRs7/fTs38ZGZmXrWPr69vrQg+AADAszi15iclJUXvv/9+hfb3339fS5Ysue6iAAAAXMWp8DNv3jyFhIRUaA8NDdXcuXOvuygAAABXcSr8HDp0SNHR0RXao6KilJ2dfd1FAQAAuIpT4Sc0NFS7d++u0L5r1y4FBwdfd1EAAACu4lT4GTx4sP7yl79ow4YNKisrU1lZmdavX6/x48dr8ODBZtcIAABgGqeu9po9e7YOHTqkbt26ydv7wiHKy8s1fPhw1vwAAAC35lT48fX11YoVK/T8889r165d8vPzU6tWrRQVFWV2fQAAAKZyKvxc1Lx5czVv3tysWgAAAFzOqfBTVlam1NRUrVu3Tvn5+SovL3fYv379elOKAwAAMJtT4Wf8+PFKTU1Vnz59FBsbK4vFYnZdAAAALuFU+Fm+fLnee+899e7d2+x6AAAAXMqpS919fX3VrFkzs2sBAABwOafCz6RJk/TKK6/IiQfCAwAA1CinTnt988032rBhgz7//HO1bNlSPj4+DvtXrlxpSnEAAABmcyr8NGjQQAMGDDC7FgAAAJdzKvykpKSYXQcAAEC1cGrNjySdP39ea9eu1euvv65Tp05Jko4ePaqioiLTigMAADCbU+Hn0KFDatWqlfr166exY8fq2LFjkqQXX3xRkydPNrXAm266SRaLpcI2duxYSVJiYmKFfe3btze1BgAAUHc4fZPDtm3bateuXQoODra3DxgwQI8++qhpxUnStm3bVFZWZn+9d+9e9ejRQw8++KC9rVevXg6n4nx9fU2tAQAA1B1OX+317bffVggZUVFROnLkiCmFXdS4cWOH1/PmzdMtt9yizp0729usVqtsNpupnwsAAOomp057lZeXO8zGXHT48GEFBARcd1GXU1JSomXLlmnkyJEOj9TYuHGjQkND1bx5cz322GPKz8+/4nGKi4tVWFjosAEAAM/gVPjp0aOHFixYYH9tsVhUVFSkmTNnuvSRFx999JFOnjypxMREe1t8fLzefvttrV+/XvPnz9e2bdt07733qri4+LLHSUpKUlBQkH2LjIx0Wc0AAMC9WAwnbtN89OhRde3aVV5eXsrMzFTbtm2VmZmpkJAQffXVVwoNDXVFrerZs6d8fX318ccfX7ZPbm6uoqKitHz5cg0cOLDSPsXFxQ7hqLCwUJGRkSooKFBgYKCpNd807dMKbQfn9TH1MwAA8ESFhYUKCgqq8u+3U2t+IiIilJGRoXfffVc7duxQeXm5Ro0apaFDh8rPz8+ZQ17VoUOHtHbt2qvePTo8PFxRUVHKzMy8bB+r1Sqr1Wp2iQAAoBZwKvxIkp+fn0aOHKmRI0eaWc9lpaSkKDQ0VH36XHnW5Pjx48rJyVF4eHi11AUAAGoXp8LP0qVLr7h/+PDhThVzOeXl5UpJSVFCQoK8vf+v5KKiIs2aNUsPPPCAwsPDdfDgQT399NMKCQnh8RsAAKBSTt/n57dKS0t15swZ+fr6yt/f3/Tws3btWmVnZ1eYZfLy8tKePXu0dOlSnTx5UuHh4eratatWrFjh0qvOAABA7eVU+Dlx4kSFtszMTP35z3/WU089dd1FXSouLk6Vrcv28/PTl19+afrnAQCAusvpZ3tdKiYmRvPmzaswKwQAAOBOTAs/0oXTUEePHjXzkAAAAKZy6rTX6tWrHV4bhqHc3FwtXLhQf/rTn0wpDAAAwBWcCj/9+/d3eG2xWNS4cWPde++9mj9/vhl11WmX3viQmx4CAFB9nAo/5eXlZtcBAABQLUxd8wMAAODunJr5mThx4jX3TU5OduYjAAAAXMKp8LNz507t2LFD58+f16233ipJ+vHHH+Xl5aXWrVvb+1ksFnOqBAAAMIlT4adv374KCAjQkiVL1LBhQ0kXbnw4YsQIderUSZMmTTK1SAAAALM4teZn/vz5SkpKsgcfSWrYsKFmz57N1V4AAMCtORV+CgsL9fPPP1doz8/P16lTp667KAAAAFdx6rTXgAEDNGLECM2fP1/t27eXJH333Xd66qmnNHDgQFML9ASX3vdH4t4/AAC4ilPh57XXXtPkyZM1bNgwlZaWXjiQt7dGjRqll156ydQCAQAAzORU+PH399eiRYv00ksv6cCBAzIMQ82aNVO9evXMrg8AAMBU13WTw9zcXOXm5qp58+aqV6+eDMMwqy4AAACXcCr8HD9+XN26dVPz5s3Vu3dv5ebmSpIeffRRLnMHAABuzanw8+STT8rHx0fZ2dny9/e3tw8aNEhffPGFacUBAACYzak1P2vWrNGXX36pJk2aOLTHxMTo0KFDphQGR1wRBgCAOZya+Tl9+rTDjM9Fv/zyi6xW63UXddGsWbNksVgcNpvNZt9vGIZmzZqliIgI+fn5qUuXLtq3b59pnw8AAOoep8LPPffco6VLl9pfWywWlZeX66WXXlLXrl1NK06SWrZsaV9YnZubqz179tj3vfjii0pOTtbChQu1bds22Ww29ejRgxstAgCAy3LqtNdLL72kLl26aPv27SopKdGUKVO0b98+/frrr/r222/NLdDb22G25yLDMLRgwQLNmDHDfmPFJUuWKCwsTO+8844ef/xxU+sAAAB1g1MzP7fddpt2796tu+++Wz169NDp06c1cOBA7dy5U7fccoupBWZmZioiIkLR0dEaPHiwfvrpJ0lSVlaW8vLyFBcXZ+9rtVrVuXNnbd68+YrHLC4uVmFhocMGAAA8Q5VnfkpLSxUXF6fXX39dzz77rCtqsmvXrp2WLl2q5s2b6+eff9bs2bPVoUMH7du3T3l5eZKksLAwh/eEhYVdddF1UlKSy2sHAADuqcozPz4+Ptq7d68sFosr6nEQHx+vBx54QK1atVL37t316acXrnhasmSJvc+ldRiGcdXapk+froKCAvuWk5NjfvEAAMAtOXXaa/jw4XrzzTfNruWq6tWrp1atWikzM9O+DujiDNBF+fn5FWaDLmW1WhUYGOiw1RU3TfvUYQMAAI6cWvBcUlKif/zjH0pLS1Pbtm0rPNMrOTnZlOIuVVxcrH//+9/q1KmToqOjZbPZlJaWpjvvvNNe16ZNm/TCCy+45PMBAEDtV6Xw89NPP+mmm27S3r171bp1a0nSjz/+6NDHzNNhkydPVt++fdW0aVPl5+dr9uzZKiwsVEJCgiwWiyZMmKC5c+cqJiZGMTExmjt3rvz9/TVkyBDTanBnzOwAAFB1VQo/MTExys3N1YYNGyRdeJzFq6++etXTTM46fPiwHn74Yf3yyy9q3Lix2rdvr++++05RUVGSpClTpujs2bMaM2aMTpw4oXbt2mnNmjUKCAhwST0AAKD2q1L4ufSp7Z9//rlOnz5takG/tXz58ivut1gsmjVrlmbNmuWyGgAAQN3i1ILniy4NQwAAAO6uSuHn4vO1Lm0DAACoLap82isxMdH+8NJz585p9OjRFa72WrlypXkVAgAAmKhK4SchIcHh9bBhw0wtBgAAwNWqFH5SUlJcVQdcpLLL4Q/O61MDlQAA4B6cuskhXK+m7+Fz6ecTmAAAdcV1Xe0FAABQ2xB+AACAR+G0lwfilBYAwJMRflDj64sAAKhOnPYCAAAehfADAAA8CuEHAAB4FMIPAADwKIQfAADgUQg/AADAo3CpO0zDc8QAALUB4QdO4/5AAIDayK1PeyUlJemuu+5SQECAQkND1b9/f+3fv9+hT2JioiwWi8PWvn37GqoYl7pp2qcOGwAANc2tw8+mTZs0duxYfffdd0pLS9P58+cVFxen06dPO/Tr1auXcnNz7dtnn31WQxUDAAB359anvb744guH1ykpKQoNDVV6erruuecee7vVapXNZqvu8gAAQC3k1uHnUgUFBZKkRo0aObRv3LhRoaGhatCggTp37qw5c+YoNDT0sscpLi5WcXGx/XVhYaFrCq5DOGUFAKgr3Pq0128ZhqGJEyeqY8eOio2NtbfHx8fr7bff1vr16zV//nxt27ZN9957r0O4uVRSUpKCgoLsW2RkZHV8BQAA4AYshmEYNV3EtRg7dqw+/fRTffPNN2rSpMll++Xm5ioqKkrLly/XwIEDK+1T2cxPZGSkCgoKFBgYaGrdzJg44tJ3AIBZCgsLFRQUVOXf71px2mvcuHFavXq1vvrqqysGH0kKDw9XVFSUMjMzL9vHarXKarWaXSauAfcCAgDUNLcOP4ZhaNy4cVq1apU2btyo6Ojoq77n+PHjysnJUXh4eDVUCAAAahu3Dj9jx47VO++8o//93/9VQECA8vLyJElBQUHy8/NTUVGRZs2apQceeEDh4eE6ePCgnn76aYWEhGjAgAE1XD3MdOmMEbNFAABnuXX4Wbx4sSSpS5cuDu0pKSlKTEyUl5eX9uzZo6VLl+rkyZMKDw9X165dtWLFCgUEBNRAxQAAwN25dfi52lpsPz8/ffnll9VUDQAAqAtqzaXuAAAAZiD8AAAAj+LWp73gmbg3EgDAlZj5AQAAHoWZH9Q4s2Z6uBweAHAtCD+olZwNTAQkAACnvQAAgEch/AAAAI9C+AEAAB6FNT/waDxlHgA8D+EHMMG1LMAmVAGAe+C0FwAA8CjM/ACX4HJ4AKjbmPkBAAAehfADAAA8Cqe9UGfxgFQAQGUIP4ATnAlWXFYPAO6hzoSfRYsW6aWXXlJubq5atmypBQsWqFOnTjVdFuoAV84gcYk8AFS/OrHmZ8WKFZowYYJmzJihnTt3qlOnToqPj1d2dnZNlwYAANyMxTAMo6aLuF7t2rVT69attXjxYnvb73//e/Xv319JSUlXfX9hYaGCgoJUUFCgwMBAU2tj3QnMVtlMkDOX53MaDkBt5+zvd62f+SkpKVF6erri4uIc2uPi4rR58+YaqgoAALirWr/m55dfflFZWZnCwsIc2sPCwpSXl1fpe4qLi1VcXGx/XVBQIOlCgjRbefEZ048Jz9b0yfed6rP32Z4Oryv7u3kt/w7Ezvzyqn2uxaX1VHZcZ/pU5lpqvpbjVNdxAVybi//NqupJrFoffi6yWCwOrw3DqNB2UVJSkp599tkK7ZGRkS6pDXAHQQvM6WMWs+oxq2ZXfffqHFPAU506dUpBQUHX3L/Wh5+QkBB5eXlVmOXJz8+vMBt00fTp0zVx4kT76/Lycv36668KDg6+bGByRmFhoSIjI5WTk2P6WqK6ijGrOsasahivqmPMqo4xqxpnx8swDJ06dUoRERFV+rxaH358fX3Vpk0bpaWlacCAAfb2tLQ09evXr9L3WK1WWa1Wh7YGDRq4rMbAwED+8lcRY1Z1jFnVMF5Vx5hVHWNWNc6MV1VmfC6q9eFHkiZOnKhHHnlEbdu21R//+Ef9/e9/V3Z2tkaPHl3TpQEAADdTJ8LPoEGDdPz4cT333HPKzc1VbGysPvvsM0VFRdV0aQAAwM3UifAjSWPGjNGYMWNqugwHVqtVM2fOrHCKDZfHmFUdY1Y1jFfVMWZVx5hVTXWPV524ySEAAMC1qvU3OQQAAKgKwg8AAPAohB8AAOBRCD8AAMCjEH4AAIBHIfwAAACPQvgBAAAehfADAAA8CuEHAAB4FMIPAADwKIQfAADgUQg/AADAoxB+AACARyH8AAAAj0L4AQAAHoXwAwAAPArhBwAAeBTCDwAA8CiEHwAA4FEIPwAAwKMQfgAAgEch/AAAAI9C+AEAAB6F8AMAADwK4QcAAHgU75ouwB2Ul5fr6NGjCggIkMViqelyAADANTAMQ6dOnVJERIRuuOHa53MIP5KOHj2qyMjImi4DAAA4IScnR02aNLnm/oQfSQEBAZIuDF5gYGANVwMAAK5FYWGhIiMj7b/j14rwI9lPdQUGBhJ+AACoZaq6ZIUFzwAAwKMQfgAAgEch/AAAAI/Cmh8AAFykrKxMpaWlNV1GreXj4yMvLy/Tj0v4AQDAZIZhKC8vTydPnqzpUmq9Bg0ayGazmXofPsKPq80KqqStoPrrAABUm4vBJzQ0VP7+/txA1wmGYejMmTPKz8+XJIWHh5t2bMIPAAAmKisrswef4ODgmi6nVvPz85Mk5efnKzQ01LRTYCx4BgDARBfX+Pj7+9dwJXXDxXE0c+0U4QcAABfgVJc5XDGOhB8AAOBRCD8AAOC6bdy4URaLpVZc4caCZwAAqktlVwC77LOqdmVx3759dfbsWa1du7bCvi1btqhDhw5KT09X69atzaqwxjDzAwAANGrUKK1fv16HDh2qsO+tt97SHXfcUSeCj0T4AQAAku677z6FhoYqNTXVof3MmTNasWKFRo0adU3HSU9PV9u2beXv768OHTpo//799n0HDhxQv379FBYWpvr16+uuu+6qdKbJ1Qg/AABA3t7eGj58uFJTU2UYhr39/fffV0lJiYYOHXpNx5kxY4bmz5+v7du3y9vbWyNHjrTvKyoqUu/evbV27Vrt3LlTPXv2VN++fZWdnW3697kSwg8AAJAkjRw5UgcPHtTGjRvtbW+99ZYGDhyohg0bXtMx5syZo86dO+u2227TtGnTtHnzZp07d06SdPvtt+vxxx9Xq1atFBMTo9mzZ+vmm2/W6tWrXfF1LovwAwAAJEktWrRQhw4d9NZbb0m6cJrq66+/dpi9uZo//OEP9n+++EiKi4+oOH36tKZMmaLbbrtNDRo0UP369fXDDz8w8wMAAGrOqFGj9OGHH6qwsFApKSmKiopSt27drvn9Pj4+9n++eIPC8vJySdJTTz2lDz/8UHPmzNHXX3+tjIwMtWrVSiUlJeZ+iasg/AAAALuHHnpIXl5eeuedd7RkyRKNGDHCtLssf/3110pMTNSAAQPUqlUr2Ww2HTx40JRjVwXhBwAA2NWvX1+DBg3S008/raNHjyoxMdG0Yzdr1kwrV65URkaGdu3apSFDhthnhaoT4QcAADgYNWqUTpw4oe7du6tp06amHffll19Ww4YN1aFDB/Xt21c9e/askXsHWYzfXs/moQoLCxUUFKSCggIFBgaae/DK7uZZxbtuAgBqj3PnzikrK0vR0dG68cYba7qcWu9K4+ns7zczPwAAwKMQfgAAwFWNHj1a9evXr3QbPXp0TZdXJTzYFAAAXNVzzz2nyZMnV7rP9CUjLkb4AQAAVxUaGqrQ0NCaLsMUnPYCAAAepUbDz1dffaW+ffsqIiJCFotFH330kcN+wzA0a9YsRUREyM/PT126dNG+ffsc+hQXF2vcuHEKCQlRvXr1dP/99+vw4cPV+C0AAKioJu5fUxe5Yhxr9LTX6dOndfvtt2vEiBF64IEHKux/8cUXlZycrNTUVDVv3lyzZ89Wjx49tH//fgUEBEiSJkyYoI8//ljLly9XcHCwJk2apPvuu0/p6eny8vKq7q8EAPBwvr6+uuGGG3T06FE1btxYvr6+pt0h2ZMYhqGSkhIdO3ZMN9xwg3x9fU07ttvc58disWjVqlXq37+/pAtfOiIiQhMmTNDUqVMlXZjlCQsL0wsvvKDHH39cBQUFaty4sf75z39q0KBBkqSjR48qMjJSn332mXr27HlNn819fgAAZiopKVFubq7OnDlT06XUev7+/goPD680/Dj7++22C56zsrKUl5enuLg4e5vValXnzp21efNmPf7440pPT1dpaalDn4iICMXGxmrz5s2XDT/FxcUqLi62vy4sLHTdFwEAeBxfX181bdpU58+fV1lZWU2XU2t5eXnJ29vb9Jkztw0/eXl5kqSwsDCH9rCwMB06dMjex9fXVw0bNqzQ5+L7K5OUlKRnn33W5IoBAPg/FotFPj4+Dk85h3tw+6u9Lk17hmFcNQFerc/06dNVUFBg33JyckypFQAAuD+3DT82m02SKszg5Ofn22eDbDabSkpKdOLEicv2qYzValVgYKDDBgAAPIPbhp/o6GjZbDalpaXZ20pKSrRp0yZ16NBBktSmTRv5+Pg49MnNzdXevXvtfQAAAH6rRtf8FBUV6T//+Y/9dVZWljIyMtSoUSM1bdpUEyZM0Ny5cxUTE6OYmBjNnTtX/v7+GjJkiCQpKChIo0aN0qRJkxQcHKxGjRpp8uTJatWqlbp3715TXwsAALixGg0/27dvV9euXe2vJ06cKElKSEhQamqqpkyZorNnz2rMmDE6ceKE2rVrpzVr1tjv8SNJL7/8sry9vfXQQw/p7Nmz6tatm1JTU7nHDwAAqJTb3OenJnGfHwAAah9nf7/dds0PAACAKxB+AACARyH8AAAAj0L4AQAAHoXwAwAAPArhBwAAeBTCDwAA8CiEHwAA4FEIPwAAwKMQfgAAgEdxKvxkZWWZXQcAAEC1cCr8NGvWTF27dtWyZct07tw5s2sCAABwGafCz65du3TnnXdq0qRJstlsevzxx7V161azawMAADCdU+EnNjZWycnJOnLkiFJSUpSXl6eOHTuqZcuWSk5O1rFjx8yuEwAAwBTXteDZ29tbAwYM0HvvvacXXnhBBw4c0OTJk9WkSRMNHz5cubm5ZtUJAABgiusKP9u3b9eYMWMUHh6u5ORkTZ48WQcOHND69et15MgR9evXz6w6AQAATOHtzJuSk5OVkpKi/fv3q3fv3lq6dKl69+6tG264kKWio6P1+uuvq0WLFqYWCwAAcL2cCj+LFy/WyJEjNWLECNlstkr7NG3aVG+++eZ1FQcAAGA2p8JPZmbmVfv4+voqISHBmcMDAAC4jFNrflJSUvT+++9XaH///fe1ZMmS6y4KAADAVZwKP/PmzVNISEiF9tDQUM2dO/e6iwIAAHAVp8LPoUOHFB0dXaE9KipK2dnZ110UAACAqzgVfkJDQ7V79+4K7bt27VJwcPB1F/VbN910kywWS4Vt7NixkqTExMQK+9q3b29qDQAAoO5wasHz4MGD9Ze//EUBAQG65557JEmbNm3S+PHjNXjwYFML3LZtm8rKyuyv9+7dqx49eujBBx+0t/Xq1UspKSn2176+vqbWAAAA6g6nws/s2bN16NAhdevWTd7eFw5RXl6u4cOHm77mp3Hjxg6v582bp1tuuUWdO3e2t1mt1stecg8AAPBbToUfX19frVixQs8//7x27dolPz8/tWrVSlFRUWbX56CkpETLli3TxIkTZbFY7O0bN25UaGioGjRooM6dO2vOnDkKDQ297HGKi4tVXFxsf11YWOjSugEAgPtwKvxc1Lx5czVv3tysWq7qo48+0smTJ5WYmGhvi4+P14MPPqioqChlZWXpmWee0b333qv09HRZrdZKj5OUlKRnn322mqoGAADuxGIYhlHVN5WVlSk1NVXr1q1Tfn6+ysvLHfavX7/etAJ/q2fPnvL19dXHH3982T65ubmKiorS8uXLNXDgwEr7VDbzExkZqYKCAgUGBppb9KygStoKzP0MAAA8UGFhoYKCgqr8++3UzM/48eOVmpqqPn36KDY21uEUlKscOnRIa9eu1cqVK6/YLzw8XFFRUVe8C7XVar3srBAAAKjbnAo/y5cv13vvvafevXubXc9lpaSkKDQ0VH369Lliv+PHjysnJ0fh4eHVVBkAAKhNnLrPj6+vr5o1a2Z2LZdVXl6ulJQUJSQk2K8uk6SioiJNnjxZW7Zs0cGDB7Vx40b17dtXISEhGjBgQLXVBwAAag+nws+kSZP0yiuvyInlQk5Zu3atsrOzNXLkSId2Ly8v7dmzR/369VPz5s2VkJCg5s2ba8uWLQoICKiW2gAAQO3i1Gmvb775Rhs2bNDnn3+uli1bysfHx2H/1dblVFVcXFylQcvPz09ffvmlqZ8FAADqNqfCT4MGDTitBAAAaiWnws9vHyUBAABQmzi15keSzp8/r7Vr1+r111/XqVOnJElHjx5VUVGRacUBAACYzamZn0OHDqlXr17Kzs5WcXGxevTooYCAAL344os6d+6cXnvtNbPrBAAAMIVTMz/jx49X27ZtdeLECfn5+dnbBwwYoHXr1plWHAAAgNmcvtrr22+/la+vr0N7VFSUjhw5YkphAAAAruDUzE95ebnKysoqtB8+fJj76wAAALfmVPjp0aOHFixYYH9tsVhUVFSkmTNnVusjLwAAAKrKqdNeL7/8srp27arbbrtN586d05AhQ5SZmamQkBC9++67ZtcIAABgGqfCT0REhDIyMvTuu+9qx44dKi8v16hRozR06FCHBdAAAADuxqnwI114tMTIkSMrPG8LAADAnTkVfpYuXXrF/cOHD3eqGAAAAFdzKvyMHz/e4XVpaanOnDkjX19f+fv7E34AAIDbcupqrxMnTjhsRUVF2r9/vzp27MiCZwAA4NacfrbXpWJiYjRv3rwKs0IAAADuxLTwI0leXl46evSomYcEAAAwlVNrflavXu3w2jAM5ebmauHChfrTn/5kSmEAAACu4FT46d+/v8Nri8Wixo0b695779X8+fPNqAsAAMAlnAo/5eXlZtcBAABQLUxd8wMAAODunJr5mThx4jX3TU5OduYjAAAAXMKp8LNz507t2LFD58+f16233ipJ+vHHH+Xl5aXWrVvb+1ksFnOqBAAAMIlT4adv374KCAjQkiVL1LBhQ0kXbnw4YsQIderUSZMmTTK1SAAAALM4teZn/vz5SkpKsgcfSWrYsKFmz55t6tVes2bNksVicdhsNpt9v2EYmjVrliIiIuTn56cuXbpo3759pn0+AACoe5wKP4WFhfr5558rtOfn5+vUqVPXXdRvtWzZUrm5ufZtz5499n0vvviikpOTtXDhQm3btk02m009evQwvQYAAFB3OBV+BgwYoBEjRuiDDz7Q4cOHdfjwYX3wwQcaNWqUBg4caGqB3t7estls9q1x48aSLsz6LFiwQDNmzNDAgQMVGxurJUuW6MyZM3rnnXdMrQEAANQdToWf1157TX369NGwYcMUFRWlqKgoDR06VPHx8Vq0aJGpBWZmZioiIkLR0dEaPHiwfvrpJ0lSVlaW8vLyFBcXZ+9rtVrVuXNnbd68+YrHLC4uVmFhocMGAAA8g1Phx9/fX4sWLdLx48ftV379+uuvWrRokerVq2dace3atdPSpUv15Zdf6o033lBeXp46dOig48ePKy8vT5IUFhbm8J6wsDD7vstJSkpSUFCQfYuMjDStZgAA4N6u6yaHF9fhNG/eXPXq1ZNhGGbVJUmKj4/XAw88oFatWql79+769NNPJUlLliyx97n0cnrDMK56if306dNVUFBg33JyckytGwAAuC+nws/x48fVrVs3NW/eXL1791Zubq4k6dFHH3XpZe716tVTq1atlJmZab/q69JZnvz8/AqzQZeyWq0KDAx02AAAgGdwKvw8+eST8vHxUXZ2tvz9/e3tgwYN0hdffGFacZcqLi7Wv//9b4WHhys6Olo2m01paWn2/SUlJdq0aZM6dOjgshoAAEDt5tRNDtesWaMvv/xSTZo0cWiPiYnRoUOHTClMkiZPnqy+ffuqadOmys/P1+zZs1VYWKiEhARZLBZNmDBBc+fOVUxMjGJiYjR37lz5+/tryJAhptUAAADqFqfCz+nTpx1mfC765ZdfZLVar7uoiw4fPqyHH35Yv/zyixo3bqz27dvru+++U1RUlCRpypQpOnv2rMaMGaMTJ06oXbt2WrNmjQICAkyrAQAA1C0Ww4lVyn369FHr1q31/PPPKyAgQLt371ZUVJQGDx6s8vJyffDBB66o1WUKCwsVFBSkgoIC89f/zAqqpK3A3M8AAMADOfv77dTMz0svvaQuXbpo+/btKikp0ZQpU7Rv3z79+uuv+vbbb505JAAAQLVwasHzbbfdpt27d+vuu+9Wjx49dPr0aQ0cOFA7d+7ULbfcYnaNAAAApqnyzE9paani4uL0+uuv69lnn3VFTQAAAC5T5ZkfHx8f7d2796o3EgQAAHBHTp32Gj58uN58802zawEAAHA5pxY8l5SU6B//+IfS0tLUtm3bCs/zSk5ONqU4AAAAs1Up/Pz000+66aabtHfvXrVu3VqS9OOPPzr04XQYAABwZ1UKPzExMcrNzdWGDRskXXicxauvvnrVZ2kBAAC4iyqt+bn0foiff/65Tp8+bWpBAAAAruTUgueLnLg5NAAAQI2qUvixWCwV1vSwxgcAANQmVVrzYxiGEhMT7Q8vPXfunEaPHl3haq+VK1eaV6En4PlfAABUmyqFn4SEBIfXw4YNM7UYAAAAV6tS+ElJSXFVHZ6lspkeAABQLa5rwTMAAEBtQ/gBAAAehfADAAA8CuEHAAB4FMIPAADwKE491R01gHsBAQBgCsIPnHdpICOMAQBqAU57AQAAj+LW4ScpKUl33XWXAgICFBoaqv79+2v//v0OfRITE+3PHLu4tW/fvoYqBgAA7s6tT3tt2rRJY8eO1V133aXz589rxowZiouL0/fff+/wPLFevXo53H3a19e3Jsp1D2adinLmOKxLAgDUAm4dfr744guH1ykpKQoNDVV6erruuecee7vVapXNZqvu8moHAgkAAA7cOvxcqqDgwo92o0aNHNo3btyo0NBQNWjQQJ07d9acOXMUGhpaEyWax5XP/2KhMgDAg9Wa8GMYhiZOnKiOHTsqNjbW3h4fH68HH3xQUVFRysrK0jPPPKN7771X6enpslqtlR6ruLhYxcXF9teFhYUurx8AALiHWhN+nnjiCe3evVvffPONQ/ugQYPs/xwbG6u2bdsqKipKn376qQYOHFjpsZKSkvTss8+6tF4AAOCe3Ppqr4vGjRun1atXa8OGDWrSpMkV+4aHhysqKkqZmZmX7TN9+nQVFBTYt5ycHLNLBgAAbsqtZ34Mw9C4ceO0atUqbdy4UdHR0Vd9z/Hjx5WTk6Pw8PDL9rFarZc9JQYAAOo2t575GTt2rJYtW6Z33nlHAQEBysvLU15ens6ePStJKioq0uTJk7VlyxYdPHhQGzduVN++fRUSEqIBAwbUcPUAAMAdufXMz+LFiyVJXbp0cWhPSUlRYmKivLy8tGfPHi1dulQnT55UeHi4unbtqhUrViggIKAGKq6lruXKMldefXYtuEINAGAStw4/hmFccb+fn5++/PLLaqrGDdV0IMH1I9QBQLVz69NeAAAAZiP8AAAAj+LWp73goTidBwBwIcIPXIs1LQAAN0P4QfUy60Gr13Kca7qKjTAGAJ6GNT8AAMCjEH4AAIBH4bQX6g6zFkq7ap0SC7kBwC0w8wMAADwK4QcAAHgUTnuh5nE6qGrMumIOADwU4QeerTYEL2cePEsYAoDLIvwAV8P9ggCgTmHNDwAA8CjM/ABmqA2nzwAAkpj5AQAAHobwAwAAPAqnvQB34srTZ2Yt3L6WK8u4+gyAGyP8AHBf1RnYAHgMwg+AmuFugcTd6gHgMqz5AQAAHoWZH6AucnbtkDOzH+52mb+71QPA7RB+ALgHswKbWbizN1Bn1Znws2jRIr300kvKzc1Vy5YttWDBAnXq1KmmywJqN7OCRV2djSEgAbVSnVjzs2LFCk2YMEEzZszQzp071alTJ8XHxys7O7umSwMAAG7GYhiGUdNFXK927dqpdevWWrx4sb3t97//vfr376+kpKSrvr+wsFBBQUEqKChQYGCgucXV1f/jBXBtanrmpzbOTlVWs7vVCLfg7O93rT/tVVJSovT0dE2bNs2hPS4uTps3b670PcXFxSouLra/Lii48C9VYWGh+QUW1/psCeB6TK/kP8jTDzu+Tmpy9T6VuZb3Xct/g5z9b19ln381lX2vazmOMzVey3GvpZ5r+bNwFp91XS7+bld5Hseo5Y4cOWJIMr799luH9jlz5hjNmzev9D0zZ840JLGxsbGxsbHVgS0nJ6dK2aHWz/xcZLFYHF4bhlGh7aLp06dr4sSJ9tfl5eX69ddfFRwcfNn3OKOwsFCRkZHKyckx/3RaHcWYVR1jVjWMV9UxZlXHmFWNs+NlGIZOnTqliIiIKn1erQ8/ISEh8vLyUl5enkN7fn6+wsLCKn2P1WqV1Wp1aGvQoIGrSlRgYCB/+auIMas6xqxqGK+qY8yqjjGrGmfGKygoqMqfU+uv9vL19VWbNm2Ulpbm0J6WlqYOHTrUUFUAAMBd1fqZH0maOHGiHnnkEbVt21Z//OMf9fe//13Z2dkaPXp0TZcGAADcTJ0IP4MGDdLx48f13HPPKTc3V7Gxsfrss88UFRVVo3VZrVbNnDmzwik2XB5jVnWMWdUwXlXHmFUdY1Y11T1edeI+PwAAANeq1q/5AQAAqArCDwAA8CiEHwAA4FEIPwAAwKMQflxo0aJFio6O1o033qg2bdro66+/rumSakRSUpLuuusuBQQEKDQ0VP3799f+/fsd+hiGoVmzZikiIkJ+fn7q0qWL9u3b59CnuLhY48aNU0hIiOrVq6f7779fhw+78Nk0biIpKUkWi0UTJkywtzFeFR05ckTDhg1TcHCw/P39dccddyg9Pd2+nzFzdP78ef33f/+3oqOj5efnp5tvvlnPPfecysvL7X08ecy++uor9e3bVxEREbJYLProo48c9ps1NidOnNAjjzyioKAgBQUF6ZFHHtHJkydd/O1c40pjVlpaqqlTp6pVq1aqV6+eIiIiNHz4cB09etThGNU2ZlV9lhauzfLlyw0fHx/jjTfeML7//ntj/PjxRr169YxDhw7VdGnVrmfPnkZKSoqxd+9eIyMjw+jTp4/RtGlTo6ioyN5n3rx5RkBAgPHhhx8ae/bsMQYNGmSEh4cbhYWF9j6jR482fve73xlpaWnGjh07jK5duxq33367cf78+Zr4WtVi69atxk033WT84Q9/MMaPH29vZ7wc/frrr0ZUVJSRmJho/Otf/zKysrKMtWvXGv/5z3/sfRgzR7NnzzaCg4ONTz75xMjKyjLef/99o379+saCBQvsfTx5zD777DNjxowZxocffmhIMlatWuWw36yx6dWrlxEbG2ts3rzZ2Lx5sxEbG2vcd9991fU1TXWlMTt58qTRvXt3Y8WKFcYPP/xgbNmyxWjXrp3Rpk0bh2NU15gRflzk7rvvNkaPHu3Q1qJFC2PatGk1VJH7yM/PNyQZmzZtMgzDMMrLyw2bzWbMmzfP3ufcuXNGUFCQ8dprrxmGceFfHB8fH2P58uX2PkeOHDFuuOEG44svvqjeL1BNTp06ZcTExBhpaWlG586d7eGH8apo6tSpRseOHS+7nzGrqE+fPsbIkSMd2gYOHGgMGzbMMAzG7Lcu/SE3a2y+//57Q5Lx3Xff2fts2bLFkGT88MMPLv5WrlVZYLzU1q1bDUn2SYHqHDNOe7lASUmJ0tPTFRcX59AeFxenzZs311BV7qOgoECS1KhRI0lSVlaW8vLyHMbLarWqc+fO9vFKT09XaWmpQ5+IiAjFxsbW2TEdO3as+vTpo+7duzu0M14VrV69Wm3bttWDDz6o0NBQ3XnnnXrjjTfs+xmzijp27Kh169bpxx9/lCTt2rVL33zzjXr37i2JMbsSs8Zmy5YtCgoKUrt27ex92rdvr6CgoDo9fhcVFBTIYrHYn61ZnWNWJ+7w7G5++eUXlZWVVXiwalhYWIUHsHoawzA0ceJEdezYUbGxsZJkH5PKxuvQoUP2Pr6+vmrYsGGFPnVxTJcvX64dO3Zo27ZtFfYxXhX99NNPWrx4sSZOnKinn35aW7du1V/+8hdZrVYNHz6cMavE1KlTVVBQoBYtWsjLy0tlZWWaM2eOHn74YUn8PbsSs8YmLy9PoaGhFY4fGhpap8dPks6dO6dp06ZpyJAh9geZVueYEX5cyGKxOLw2DKNCm6d54okntHv3bn3zzTcV9jkzXnVxTHNycjR+/HitWbNGN95442X7MV7/p7y8XG3bttXcuXMlSXfeeaf27dunxYsXa/jw4fZ+jNn/WbFihZYtW6Z33nlHLVu2VEZGhiZMmKCIiAglJCTY+zFml2fG2FTWv66PX2lpqQYPHqzy8nItWrToqv1dMWac9nKBkJAQeXl5VUih+fn5Ff5PwZOMGzdOq1ev1oYNG9SkSRN7u81mk6QrjpfNZlNJSYlOnDhx2T51RXp6uvLz89WmTRt5e3vL29tbmzZt0quvvipvb2/792W8/k94eLhuu+02h7bf//73ys7OlsTfsco89dRTmjZtmgYPHqxWrVrpkUce0ZNPPqmkpCRJjNmVmDU2NptNP//8c4XjHzt2rM6OX2lpqR566CFlZWUpLS3NPusjVe+YEX5cwNfXV23atFFaWppDe1pamjp06FBDVdUcwzD0xBNPaOXKlVq/fr2io6Md9kdHR8tmszmMV0lJiTZt2mQfrzZt2sjHx8ehT25urvbu3VvnxrRbt27as2ePMjIy7Fvbtm01dOhQZWRk6Oabb2a8LvGnP/2pwu0TfvzxR/vDjfk7VtGZM2d0ww2OPwFeXl72S90Zs8sza2z++Mc/qqCgQFu3brX3+de//qWCgoI6OX4Xg09mZqbWrl2r4OBgh/3VOmbXvDQaVXLxUvc333zT+P77740JEyYY9erVMw4ePFjTpVW7P//5z0ZQUJCxceNGIzc3176dOXPG3mfevHlGUFCQsXLlSmPPnj3Gww8/XOllo02aNDHWrl1r7Nixw7j33nvrxCW11+K3V3sZBuN1qa1btxre3t7GnDlzjMzMTOPtt982/P39jWXLltn7MGaOEhISjN/97nf2S91XrlxphISEGFOmTLH38eQxO3XqlLFz505j586dhiQjOTnZ2Llzp/3KJLPGplevXsYf/vAHY8uWLcaWLVuMVq1a1dpL3a80ZqWlpcb9999vNGnSxMjIyHD4LSguLrYfo7rGjPDjQv/zP/9jREVFGb6+vkbr1q3tl3Z7GkmVbikpKfY+5eXlxsyZMw2bzWZYrVbjnnvuMfbs2eNwnLNnzxpPPPGE0ahRI8PPz8+47777jOzs7Gr+NjXj0vDDeFX08ccfG7GxsYbVajVatGhh/P3vf3fYz5g5KiwsNMaPH280bdrUuPHGG42bb77ZmDFjhsMPkSeP2YYNGyr971ZCQoJhGOaNzfHjx42hQ4caAQEBRkBAgDF06FDjxIkT1fQtzXWlMcvKyrrsb8GGDRvsx6iuMbMYhmFc+zwRAABA7caaHwAA4FEIPwAAwKMQfgAAgEch/AAAAI9C+AEAAB6F8AMAADwK4QcAAHgUwg8AAPAohB8AAOBRCD8AAMCjEH4AAIBHIfwAAACP8v8BVrzwUHSyzyMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_val[target_list].plot.hist(bins=100, subplots=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "08199065",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = df_train[variable_list + target_list ]\n",
    "X_val = df_val[variable_list + target_list]\n",
    "X_test = df_test[variable_list + target_list ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "02ed1cb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rmse_loss(x, y):\n",
    "    return ((x-y)**2).mean()**.5"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5eb81bf",
   "metadata": {},
   "source": [
    "## linear model with sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "485019d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.impute import SimpleImputer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e00a025a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE:\n",
      "\tBMag_ha: 51.999\n",
      "\tV_ha: 96.744\n",
      "R2 score:\n",
      "\tBMag_ha: 0.742\n",
      "\tV_ha: 0.747\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_10413/256522388.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  X_trainval = X_train.append(X_val)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None, None]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pure linear model\n",
    "X_trainval = X_train.append(X_val)\n",
    "\n",
    "imputer = SimpleImputer().fit(X_trainval[variable_list])\n",
    "X_train_ = imputer.transform(X_trainval[variable_list])\n",
    "model = LinearRegression().fit(\n",
    "    X_train_, \n",
    "    X_trainval[target_list]\n",
    ")\n",
    "y_pred = model.predict(imputer.transform(X_test[variable_list]))\n",
    "y_pred = np.clip(y_pred, a_min=0, a_max=None)\n",
    "rmse = []\n",
    "r2 = []\n",
    "for i, name in enumerate(target_list):\n",
    "    rmse.append(rmse_loss(X_test[target_list[i]], y_pred[:, i]))\n",
    "    r2.append(r2_score(X_test[target_list[i]], y_pred[:, i]))\n",
    "    \n",
    "print(f\"RMSE:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, rmse)]\n",
    "       \n",
    "print(f\"R2 score:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, r2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "73a90ee1",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_train = model.predict(imputer.transform(X_train[variable_list]))\n",
    "# np.savetxt(\"linreg_train.csv\", y_pred_train)\n",
    "\n",
    "y_pred_val = model.predict(imputer.transform(X_val[variable_list]))\n",
    "# np.savetxt(\"linreg_val.csv\", y_pred_val)\n",
    "\n",
    "y_pred_test = model.predict(imputer.transform(X_test[variable_list]))\n",
    "# np.savetxt(\"linreg_test.csv\", y_pred_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "cb2e5a83",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE:\n",
      "\tBMag_ha: 54.368\n",
      "\tV_ha: 95.053\n",
      "R2 score:\n",
      "\tBMag_ha: 0.736\n",
      "\tV_ha: 0.764\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None, None]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred_val = np.clip(y_pred_val, a_min=0, a_max=None)\n",
    "rmse = []\n",
    "r2 = []\n",
    "for i, name in enumerate(target_list):\n",
    "    rmse.append(rmse_loss(X_val[target_list[i]], y_pred_val[:, i]))\n",
    "    r2.append(r2_score(X_val[target_list[i]], y_pred_val[:, i]))\n",
    "\n",
    "print(f\"RMSE:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, rmse)]\n",
    "       \n",
    "print(f\"R2 score:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, r2)]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c66446ac",
   "metadata": {},
   "source": [
    "## RF with sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "6456ddfd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "56441402",
   "metadata": {},
   "outputs": [],
   "source": [
    "imputer = SimpleImputer(strategy=\"constant\", fill_value=-100).fit(X_trainval[variable_list])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "65256bf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import ParameterGrid\n",
    "param_grid = {\n",
    "    # Number of features to consider at every split\n",
    "   'max_features': np.arange(0.1, 1.1, 0.1),\n",
    "    # depth of tree (None means full depth)\n",
    "   'max_depth': list(np.arange(5, 21)) + [None],\n",
    "    # Minimum number of samples required at each leaf node\n",
    "   'min_samples_leaf': [1] + list(np.arange(2, 17, 2)),\n",
    "    # Method of selecting samples for training each tree\n",
    "    \"max_samples\": np.arange(0.1, 1.1, 0.1),\n",
    "   'bootstrap': [True],\n",
    "}\n",
    "pgrid = ParameterGrid(param_grid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3cbedf8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm.auto import tqdm\n",
    "best_score = -np.inf\n",
    "best_p = {}\n",
    "best_rf = None\n",
    "pbar = tqdm(pgrid)\n",
    "for p in pbar:\n",
    "    pbar.set_postfix_str(str(p))\n",
    "    pbar.refresh()\n",
    "    rf = RandomForestRegressor(1000, n_jobs=-1, oob_score=True, **p).fit(\n",
    "        imputer.transform(X_trainval[variable_list]), \n",
    "        X_trainval[target_list]\n",
    "    )\n",
    "    if rf.oob_score_ > best_score:\n",
    "        best_score = rf.oob_score_\n",
    "        best_p = p\n",
    "        best_rf = rf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed215d65",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(f\"best params: {best_p}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "34f9d264",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_p = {'bootstrap': True, 'max_depth': 11, 'max_features': 0.9, 'max_samples': 0.2, 'min_samples_leaf': 6}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "7e25184b",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_rf = RandomForestRegressor(5000, n_jobs=-1, oob_score=True, **best_p).fit(\n",
    "    imputer.transform(X_trainval[variable_list]), \n",
    "    X_trainval[target_list]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "6c3092c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RMSE:\n",
      "\tBMag_ha: 50.176\n",
      "\tV_ha: 87.487\n",
      "R2 score:\n",
      "\tBMag_ha: 0.775\n",
      "\tV_ha: 0.800\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[None, None]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred = best_rf.predict(imputer.transform(X_val[variable_list]))\n",
    "y_pred = np.clip(y_pred, a_min=0, a_max=None)\n",
    "rmse = []\n",
    "r2 = []\n",
    "for i, name in enumerate(target_list):\n",
    "    rmse.append(rmse_loss(X_val[target_list[i]], y_pred[:, i]))\n",
    "    r2.append(r2_score(X_val[target_list[i]], y_pred[:, i]))\n",
    "print(f\"RMSE:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, rmse)]\n",
    "       \n",
    "print(f\"R2 score:\")\n",
    "[print(f\"\\t{target}: {score:.3f}\") for target, score in zip(target_list, r2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "12454bdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.savetxt(\"rf_val.csv\", y_pred)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
